Take Home Exercise 3

pacman::p_load(jsonlite, tidygraph, ggraph, 
               visNetwork, graphlayouts, ggforce, writexl, 
               skimr, tidytext, tidyverse, igraph,RColorBrewer)
mc3_data <- fromJSON("data/MC3.json")
mc3_edges <- as_tibble(mc3_data$links) %>% 
  distinct() %>%
  mutate(source = as.character(source),
         target = as.character(target),
         type = as.character(type)) %>%
  group_by(source, target, type) %>%
    summarise(weights = n()) %>%
  filter(source!=target) %>%
  ungroup()
`summarise()` has grouped output by 'source', 'target'. You can override using
the `.groups` argument.
mc3_nodes <- as_tibble(mc3_data$nodes) %>%
  mutate(country = as.character(country),
         id = as.character(id),
         product_services = as.character(product_services),
         revenue_omu = as.numeric(as.character(revenue_omu)),
         type = as.character(type)) %>%
  select(id, country, type, revenue_omu, product_services)
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `revenue_omu = as.numeric(as.character(revenue_omu))`.
Caused by warning:
! NAs introduced by coercion
DT :: datatable(mc3_edges)
Warning in instance$preRenderHook(instance): It seems your data is too big for
client-side DataTables. You may consider server-side processing:
https://rstudio.github.io/DT/server.html
DT::datatable(mc3_nodes)
Warning in instance$preRenderHook(instance): It seems your data is too big for
client-side DataTables. You may consider server-side processing:
https://rstudio.github.io/DT/server.html
token_nodes <- mc3_nodes %>%
  unnest_tokens(word, 
                product_services)
stopwords_removed <- token_nodes %>% 
  anti_join(stop_words)
Joining with `by = join_by(word)`
unique_words <- stopwords_removed %>%
  count(word, sort = TRUE)
write_xlsx(unique_words,"C:/LinnThawTar/ISSS608_VAA/Take_Home_Exercise/Take_Home_EX_3/data/words.xlsx")
id1 <- mc3_edges %>%
  select(source) %>%
  rename(id = source)
id2 <- mc3_edges %>%
  select(target) %>%
  rename(id = target)
mc3_nodes1 <- rbind(id1, id2) %>%
  distinct() %>%
  left_join(mc3_nodes,
            unmatched = "drop")
Joining with `by = join_by(id)`
mc3_graph <- tbl_graph(nodes = mc3_nodes1,
                       edges = mc3_edges,
                       directed = TRUE)
edges_df <- mc3_graph %>%
  activate(edges) %>%
  as.tibble()
Warning: `as.tibble()` was deprecated in tibble 2.0.0.
ℹ Please use `as_tibble()` instead.
ℹ The signature and semantics have changed, see `?as_tibble`.
nodes_df <- mc3_graph %>%
  activate(nodes) %>%
  as.tibble() %>%
  rename(label = id) %>%
  mutate(id=row_number()) %>%
  select(id, label)
g <- graph_from_data_frame(d=edges_df, vertices=nodes_df, directed=FALSE) # an undirected graph
g
IGRAPH d572a7d UN-- 37324 24036 -- 
+ attr: name (v/c), label (v/c), type (e/c), weights (e/n)
+ edges from d572a7d (vertex names):
 [1] 1 --16060 1 --16061 2 --16062 3 --16063 4 --16064 4 --16065 5 --16066
 [8] 5 --16067 5 --16068 5 --16069 7 --16070 8 --16071 9 --16072 10--16073
[15] 11--16074 11--16075 11--16076 12--16077 13--16078 13--16079 13--16080
[22] 13--16081 13--16082 14--16083 14--16084 14--16085 15--16086 16--16087
[29] 16--16088 16--16089 16--16090 16--16091 17--16092 17--16093 18--16094
[36] 18--16095 18--16096 18--16097 18--16098 19--16099 20--16100 20--16101
[43] 22--16102 23--16103 23--16104 23--16105 23--16106 23--16107 23--16108
[50] 23--16109 23--16110 23--16111 23--16112 23--16113 23--16114 23--16115
+ ... omitted several edges
degree_centrality <- degree(g)
nodes_df$degree_centrality <- degree_centrality[as.character(nodes_df$label)]
head(sort(degree_centrality, decreasing=TRUE))
13903  3483  9501  9903 14267  2053 
  120    91    72    65    62    54 
mc3_colors_centrality <- rev(colorRampPalette(brewer.pal(9, "Oranges"))(22))
nodes_df <- nodes_df %>% mutate(degree_rank=37325-floor(rank(degree_centrality)),
                                color.background=mc3_colors_centrality[degree_rank])
network_degree <- visNetwork(nodes_df, edges_df, height='350px', width="100%", main="Degree Centrality") %>%
  visLayout(randomSeed=21) %>% 
  visOptions(highlightNearest=TRUE,
             nodesIdSelection=TRUE,
             selectedBy="degree_rank") %>% 
  visInteraction(hideEdgesOnDrag=TRUE,
                 dragNodes=TRUE,
                 dragView=TRUE,
                 zoomView=TRUE,
                 navigationButtons=TRUE)
network_degree